import scrapy
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

class huabanSpider(scrapy.Spider):
    name='huaban'
    allow_status=['500','400']
    index_num = 1
    end_num = 41662204
    def start_requests(self):
        for i in range(1,self.end_num):
            yield scrapy.Request('http://huaban.com/pins/%d/'%i)

    def parse(self,response):
        self.index_num+=1
        if response.status==200:
            soup = BeautifulSoup(response.body)
            scriptList = soup.findAll("script")
            script_str = ""
            for item in scriptList:
                script_str += item.text
            fp = open(str(self.index_num) + ".txt", "w")
            fp.write(script_str)
            fp.close()


